{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1e62db0a-2680-472f-bf59-4d660bc4afdb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   Year STATE    State_NA State_Code  Real_GSP_2017_$M  State_UR  Population  \\\n",
      "0  2006    AK      Alaska    2006_AK           47546.2       6.6    675302.0   \n",
      "1  2007    AK      Alaska    2007_AK           50126.7       6.3    680300.0   \n",
      "2  2008    AK      Alaska    2008_AK           49926.6       6.7    687455.0   \n",
      "3  2009    AK      Alaska    2009_AK           54951.9       7.7    698895.0   \n",
      "4  2006    CA  California    2006_CA         2120435.4       4.9  36021202.0   \n",
      "\n",
      "   Any_merger?  IOU_Merger  GSP_Per_Capita  ...  Cust_com      KWH_com  \\\n",
      "0            1           0    70407.314061  ...     45690  5141.916539   \n",
      "1            1           0    73683.227988  ...     46047  5119.000000   \n",
      "2            1           0    72625.262744  ...     47032  5052.000000   \n",
      "3            1           0    78626.832357  ...     47337  5002.000000   \n",
      "4            0           0    58866.314345  ...   1751882  5767.847045   \n",
      "\n",
      "   CPKWH_com    Bill_com  IOU_Indus  Mark_Indus  Cust_indus     KWH_indus  \\\n",
      "0  11.927242  613.288831          5          23        1383  74887.141480   \n",
      "1  12.186942  623.810000          5          22        1349  85481.000000   \n",
      "2  13.635446  688.850000          4          22        1350  82985.000000   \n",
      "3  14.460000  723.130000          4          23        1367  79946.000000   \n",
      "4  12.895206  743.775741          6          40       79036  53763.362054   \n",
      "\n",
      "   CPKWH_indus    Bill_indus  \n",
      "0    11.540786   8642.564473  \n",
      "1    12.632483  10798.370000  \n",
      "2    14.173092  11761.490000  \n",
      "3    13.150000  10510.540000  \n",
      "4    10.090068   5424.760025  \n",
      "\n",
      "[5 rows x 42 columns]\n",
      "Imputation completed and saved to Excel.\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.impute import SimpleImputer\n",
    "\n",
    "# Load the Excel file\n",
    "file_path = r\"C:\\Users\\Sarth\\OneDrive\\Merger Analysis of Duke Energy and Progress Energy\\Consolidated Useful data\\Consolidated Data File.xlsx\"\n",
    "sheet_name = \"Main\"\n",
    "df = pd.read_excel(file_path, sheet_name=sheet_name)\n",
    "\n",
    "# Inspect the data\n",
    "print(df.head())\n",
    "\n",
    "# Define the columns\n",
    "national_columns = ['Coal_Cost_US', 'Fuel_Cost_US', 'Gas_Cost_US', 'Nuclear_Cost_US']\n",
    "state_columns = ['Cost_Coal_MBTU', 'Cost_FuelOil_MBTU', 'Cost_NaturalGas_MBTU', 'Cost_Nuclear_MBTU']\n",
    "\n",
    "# Impute missing values in national averages with mean\n",
    "imputer = SimpleImputer(strategy='mean')\n",
    "df[national_columns] = imputer.fit_transform(df[national_columns])\n",
    "\n",
    "# Function to impute state-level costs using national averages\n",
    "def impute_state_cost(state_column, national_column):\n",
    "    # Separate the data into known and missing values\n",
    "    known_data = df[[state_column, national_column]].dropna()\n",
    "    missing_data = df[df[state_column].isnull()]\n",
    "\n",
    "    # Train the regression model\n",
    "    model = LinearRegression()\n",
    "    model.fit(known_data[national_column].values.reshape(-1, 1), known_data[state_column])\n",
    "\n",
    "    # Predict the missing values\n",
    "    predicted_values = model.predict(missing_data[national_column].values.reshape(-1, 1))\n",
    "    df.loc[df[state_column].isnull(), state_column] = predicted_values\n",
    "\n",
    "# Impute each state-level cost\n",
    "for state_col, national_col in zip(state_columns, national_columns):\n",
    "    impute_state_cost(state_col, national_col)\n",
    "\n",
    "# Save the imputed data back to Excel\n",
    "output_file_path = r\"C:\\Users\\Sarth\\OneDrive\\Merger Analysis of Duke Energy and Progress Energy\\Consolidated Useful data\\Consolidated Data File Imputed.xlsx\"\n",
    "df.to_excel(output_file_path, sheet_name=sheet_name, index=False)\n",
    "\n",
    "print(\"Imputation completed and saved to Excel.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4637e74-781d-4700-b577-81e749d3ed6e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
